3  Results

library(ggplot2)

Code
library(ggplot2)
library(dplyr)
library(tidyr)
library(naniar)
library(ggmap)
library(lubridate)
library(sf)
library(dygraphs)
library(xts)
library(ggiraph)
library(sf)
library(ggalluvial)
library(patchwork)
library(vcd)
library(forcats)
library(RColorBrewer)
Code
# Read in data
data <- read.csv("NYPD_Arrest_Data.csv", na.strings = c("(null)", "N/A"))
Code
### Data Preprocessing step
data <- na.omit(data)
data$ARREST_DATE <- as.Date(data$ARREST_DATE, format = "%m/%d/%Y")
data <- data |>
  mutate(ARREST_BORO = case_when(
    ARREST_BORO == "B" ~ "Bronx",
    ARREST_BORO == "S" ~ "Staten Island",
    ARREST_BORO == "K" ~ "Brooklyn",
    ARREST_BORO == "M" ~ "Manhattan",
    ARREST_BORO == "Q" ~ "Queens"
    )) |>
  mutate(LAW_CAT_CD = case_when(
    LAW_CAT_CD == "F" ~ "Felony",
    LAW_CAT_CD == "M" ~ "Misdemeanor",
    LAW_CAT_CD == "V" ~ "Violation",
    .default = NA
  )) |>
  mutate(JURISDICTION_CODE = case_when(
    JURISDICTION_CODE == 0 ~ "Patrol",
    JURISDICTION_CODE == 1 ~ "Transit",
    JURISDICTION_CODE == 2 ~ "Housing",
    .default = "Non NYPD"
  )) |>
  mutate(PERP_SEX = case_when(
    PERP_SEX == "F" ~ "Female",
    PERP_SEX == "M" ~ "Male"
  )) |>
  mutate(Latitude = as.numeric(Latitude),
         Longitude = as.numeric(Longitude)
  ) |> 
  mutate(Weekday = wday(ARREST_DATE, label = TRUE, abbr = TRUE)
  )

3.2 Geographical Patterns of Crime in NYC

Code
nyc_sf <- read_sf("new-york-city-boroughs.geojson")
data <- data |>
  filter(Longitude != 0 & Latitude != 0)
arrest_sf <- st_as_sf(data, coords = c("Longitude", "Latitude"), crs = 4326)

borough_arrest_count <- arrest_sf |>
  st_drop_geometry() |>
  group_by(ARREST_BORO) |>
  summarise(total_arrests = n())

nyc_sf <- nyc_sf |>
  left_join(borough_arrest_count, by = c("name" = "ARREST_BORO")) |>
  mutate(tooltip = paste(name, "<br>Total Arrests:", total_arrests))

interactive_map <- ggplot() +
  geom_sf_interactive(data = nyc_sf,
                      aes(fill = name, geometry = geometry, tooltip = tooltip),
                      color = "black", size = 0.3, alpha = 0.5) +
  geom_sf(data = arrest_sf, aes(geometry = geometry),
          color = "red", size = 0.05, alpha = 0.4, stroke = 0.3, shape = 1) +
  labs(title = "Arrest Locations in NYC", 
       x = "Longitude", y = "Latitude", fill = "Borough") +
  coord_sf() +
  theme_minimal() +
  theme(plot.title = element_text(size = 16, face = "bold"),
        plot.subtitle = element_text(size = 12, face = "italic"),
        legend.title = element_text(size = 12),
        legend.text = element_text(size = 10),
        plot.margin = margin(1, 1, 1, 1, "cm"))

girafe(ggobj = interactive_map)

Brooklyn leads with the highest number of arrests (53,987), showcasing widespread clusters, particularly in its northern and eastern neighborhoods. These areas may experience higher crime rates due to concentrated urban activity and demographic factors.

Manhattan follows with 46,226 arrests, heavily concentrated in its central and southern regions, including areas like Midtown and Downtown. The high urban density, commercial activity, and large influx of daily commuters likely contribute to this concentration. The Bronx, with 43,896 arrests, shows dense clusters in its central and southern areas, reflecting persistent socio-economic challenges and a relatively high population density.

Queens accounts for 42,969 arrests, characterized by a more dispersed pattern with prominent clusters in neighborhoods like Jamaica and Flushing. The suburban nature of Queens contrasts with the dense urban dynamics of Manhattan and the Bronx. Staten Island, with only 8,341 arrests, has the lowest crime figures, reflecting its smaller population and suburban characteristics, with most arrests concentrated in the northern part of the borough.

Code
nyc_precincts <- st_read("Police Precincts.geojson")
Reading layer `Police Precincts' from data source 
  `/Users/xusiming/Desktop/24-Fall/5702/Final Project/nypd-arrest-visualization/Police Precincts.geojson' 
  using driver `GeoJSON'
Simple feature collection with 77 features and 3 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -74.25559 ymin: 40.49613 xmax: -73.70001 ymax: 40.91553
Geodetic CRS:  WGS 84
Code
nyc_precincts$precinct <- as.integer(nyc_precincts$precinct)

# Aggregate arrests by precinct
precinct_arrests <- data |>
  group_by(ARREST_PRECINCT) |>
  summarise(Count = n())

# Merge arrest data with precinct shapefile
precinct_arrests_map <- nyc_precincts |>
  left_join(precinct_arrests, by = c("precinct" = "ARREST_PRECINCT"))

# Plot precinct-level arrests
ggplot(data = precinct_arrests_map) +
  geom_sf(aes(fill = Count), color = "white") +
  scale_fill_viridis_c() +
  labs(
    title = "Arrests by Police Precinct in NYC",
    fill = "Number of Arrests"
  ) +
  theme_minimal()